This document describes how to go from raw vcftools output of diversity metrics (Fst, pi, and Tajima’s D) to Manhattan plots, including making figures for the manuscript associated with this repo. This script is a result of brute forcing things to work: I am certainly no expert, and thus lots of the notes refer to my own dumb mistakes.
library(tidyverse)
library(qqman)
library(scales)
First, read in raw output files from vcftools (in this repo, see filter-scan.sh for code to generate).
fst.UKUS.50kb <- as_tibble(read.csv("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/input/EUSTreseq_pseud_nostep_UKUS_50kb.windowed.weir.fst",sep="\t"))
fst.AUUK.50kb <- as_tibble(read.csv("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/input/EUSTreseq_pseud_nostep_AUUK_50kb.windowed.weir.fst",sep="\t"))
fst.USAU.50kb <- as_tibble(read.csv("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/input/EUSTreseq_pseud_nostep_AUUS_50kb.windowed.weir.fst",sep="\t"))
pi.UK.50kb <- as_tibble(read.csv("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/input/EUSTreseq_pseudochrom_UK_pi_50kb.windowed.pi",sep="\t"))
pi.AU.50kb <- as_tibble(read.csv("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/input/EUSTreseq_pseudochrom_AU_pi_50kb.windowed.pi",sep="\t"))
pi.US.50kb <- as_tibble(read.csv("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/input/EUSTreseq_pseudochrom_US_pi_50kb.windowed.pi",sep="\t"))
TajD.UK.50kb <- as_tibble(read.csv("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/input/EUSTreseq_pseudochrom_UK_TajimaD_50kb.Tajima.D",sep="\t"))
TajD.AU.50kb <- as_tibble(read.csv("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/input/EUSTreseq_pseudochrom_AU_TajimaD_50kb.Tajima.D",sep="\t"))
TajD.US.50kb <- as_tibble(read.csv("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/input/EUSTreseq_pseudochrom_US_TajimaD_50kb.Tajima.D",sep="\t"))
vcftools outputs a few identifies for position: “CHROM,” “BIN_START” and “BIN_END” for .fst and .pi files, but only “CHROM” AND “BIN_START” for .Tajima.D files. Unfortunately, the numbering is off, so we’ll add 1 to every “BIN_START” in the .Tajima.D files.
head(TajD.AU.50kb)
## # A tibble: 6 x 4
## CHROM BIN_START N_SNPS TajimaD
## <fct> <int> <int> <dbl>
## 1 10 0 147 0.583
## 2 10 50000 334 0.372
## 3 10 100000 301 0.836
## 4 10 150000 98 1.56
## 5 10 200000 102 -0.557
## 6 10 250000 139 -0.0356
head(fst.AUUK.50kb)
## # A tibble: 6 x 6
## CHROM BIN_START BIN_END N_VARIANTS WEIGHTED_FST MEAN_FST
## <fct> <int> <int> <int> <dbl> <dbl>
## 1 10 1 50000 164 0.00792 0.0105
## 2 10 50001 100000 379 0.0595 0.0500
## 3 10 100001 150000 311 0.00728 -0.000459
## 4 10 150001 200000 120 0.0162 0.0275
## 5 10 200001 250000 121 0.126 0.0926
## 6 10 250001 300000 155 0.109 0.0747
TajD.UK.50kb$BIN_START <- TajD.UK.50kb$BIN_START + 1
TajD.AU.50kb$BIN_START <- TajD.AU.50kb$BIN_START + 1
TajD.US.50kb$BIN_START <- TajD.US.50kb$BIN_START + 1
Now BIN_START should match. To use dplyr, we’ll need a column that specifes a unique position in the genome. Create a new column that joins “CHROM” and “BIN_START” so that we match each value based on actual position in the genome.
fst.UKUS.50kb$POS_ID <- paste(fst.UKUS.50kb$CHROM,fst.UKUS.50kb$BIN_START,sep="-")
fst.AUUK.50kb$POS_ID <- paste(fst.AUUK.50kb$CHROM,fst.AUUK.50kb$BIN_START,sep="-")
fst.USAU.50kb$POS_ID <- paste(fst.USAU.50kb$CHROM,fst.USAU.50kb$BIN_START,sep="-")
pi.UK.50kb$POS_ID <- paste(pi.UK.50kb$CHROM,pi.UK.50kb$BIN_START,sep="-")
pi.AU.50kb$POS_ID <- paste(pi.AU.50kb$CHROM,pi.AU.50kb$BIN_START,sep="-")
pi.US.50kb$POS_ID <- paste(pi.US.50kb$CHROM,pi.US.50kb$BIN_START,sep="-")
TajD.UK.50kb$POS_ID <- paste(TajD.UK.50kb$CHROM,TajD.UK.50kb$BIN_START,sep="-")
TajD.AU.50kb$POS_ID <- paste(TajD.AU.50kb$CHROM,TajD.AU.50kb$BIN_START,sep="-")
TajD.US.50kb$POS_ID <- paste(TajD.US.50kb$CHROM,TajD.US.50kb$BIN_START,sep="-")
Now drop column names that we no longer need, otherwise we’ll have a giant table after all the merging.
fst.AUUK.50kb <- fst.AUUK.50kb %>% select(-CHROM, -BIN_START, -BIN_END, -N_VARIANTS)
fst.USAU.50kb <- fst.USAU.50kb %>% select(-CHROM, -BIN_START, -BIN_END, -N_VARIANTS)
pi.UK.50kb <- pi.UK.50kb %>% select(-CHROM, -BIN_START, -BIN_END, -N_VARIANTS)
pi.AU.50kb <- pi.AU.50kb %>% select(-CHROM, -BIN_START, -BIN_END, -N_VARIANTS)
pi.US.50kb <- pi.US.50kb %>% select(-CHROM, -BIN_START, -BIN_END, -N_VARIANTS)
TajD.UK.50kb <- TajD.UK.50kb %>% select(-CHROM, -BIN_START, -N_SNPS)
TajD.AU.50kb <- TajD.AU.50kb %>% select(-CHROM, -BIN_START, -N_SNPS)
TajD.US.50kb <- TajD.US.50kb %>% select(-CHROM, -BIN_START, -N_SNPS)
We’ll join tables based on the unique position identified (POS_ID). The rename() command ensures that each new column header specifies population (new name first). Heads up: each time you join two tables, those two tables are no longer accessible except as a joined table!
fstUKUS.fstAUUK.50kb <- left_join(fst.UKUS.50kb,fst.AUUK.50kb, by = "POS_ID", copy = FALSE, suffix=c("_UKUS","_AUUK"))
fst.50kb <- left_join(fstUKUS.fstAUUK.50kb,fst.USAU.50kb, by = "POS_ID", copy = FALSE, suffix=c("","_USAU"))
fst.50kb <- rename(fst.50kb, WEIGHTED_FST_USAU = WEIGHTED_FST)
fst.50kb <- rename(fst.50kb, MEAN_FST_USAU = MEAN_FST)
fst.50kb.piUK <- left_join(fst.50kb,pi.UK.50kb, by = "POS_ID", copy = FALSE)
fst.50kb.piUK.piUS <- left_join(fst.50kb.piUK,pi.US.50kb, by = "POS_ID", copy = FALSE,suffix=c("_UK","_US"))
fst.pi.50kb <- left_join(fst.50kb.piUK.piUS,pi.AU.50kb, by = "POS_ID", copy = FALSE)
fst.pi.50kb <- rename(fst.pi.50kb, PI_AU = PI)
fst.pi.50kb.TajDUK <- left_join(fst.pi.50kb,TajD.UK.50kb, by = "POS_ID", copy = FALSE)
fst.pi.50kb.TajDUK.TajDUS <- left_join(fst.pi.50kb.TajDUK,TajD.US.50kb, by = "POS_ID", copy = FALSE, suffix=c("_UK","_US"))
div <- left_join(fst.pi.50kb.TajDUK.TajDUS,TajD.AU.50kb, by = "POS_ID", copy = FALSE)
div <- rename(div, TajimaD_AU = TajimaD)
We’re going to drop data from: * small scaffolds (which conveniently start with ‘KQ’ or ‘LNCF’, * any rows (positions) with missing data (NA), * and also coerce “negative” FST or pi to zero. We replace POS_ID (which was converted to numeric) with SNP. For the qqman package, chromosomes need to be a numeric value, so we also use lapply() below to rename chromosomes.
div <- filter(div, !grepl('KQ',CHROM))
div <- filter(div, !grepl('LNCF',CHROM))
div <- filter(div, !grepl('Unknown',CHROM))
div <- div %>% drop_na()
div[,c(5:6)][div[,c(5:6)] < 0] <- 0
div[,c(8:14)][div[,c(8:14)] < 0] <- 0
div <- data.frame(lapply(div, function(x) {gsub("1A", "1.25", x)}))
div <- data.frame(lapply(div, function(x) {gsub("1B", "1.75", x)}))
div <- data.frame(lapply(div, function(x) {gsub("4A", "4.5", x)}))
div <- data.frame(lapply(div, function(x) {gsub("LG5", "28", x)}))
div <- data.frame(lapply(div, function(x) {gsub("LGE22", "29", x)}))
div <- data.frame(lapply(div, function(x) {gsub("Z", "0", x)}))
indx <- sapply(div, is.factor)
div[indx] <- lapply(div[indx], function(x) as.numeric(as.character(x)))
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
div <- div %>% select(-POS_ID)
div$SNP <- seq.int(nrow(div))
str(div)
## 'data.frame': 20071 obs. of 17 variables:
## $ CHROM : num 10 10 10 10 10 10 10 10 10 10 ...
## $ BIN_START : num 1 50001 100001 150001 200001 ...
## $ BIN_END : num 50000 100000 150000 200000 250000 300000 350000 400000 450000 500000 ...
## $ N_VARIANTS : num 165 377 311 120 121 156 80 101 24 18 ...
## $ WEIGHTED_FST_UKUS: num 0.01745 0.03751 0 0 0.00586 ...
## $ MEAN_FST_UKUS : num 0.0133 0.0272 0 0 0 ...
## $ WEIGHTED_FST_AUUK: num 0.00792 0.05948 0.00728 0.01624 0.12564 ...
## $ MEAN_FST_AUUK : num 0.0105 0.05 0 0.0275 0.0926 ...
## $ WEIGHTED_FST_USAU: num 0.01291 0.00258 0 0 0.00315 ...
## $ MEAN_FST_USAU : num 0.0093 0.00585 0 0 0 ...
## $ PI_UK : num 0.001053 0.002189 0.002212 0.000915 0.000879 ...
## $ PI_US : num 0.001063 0.002373 0.002003 0.000829 0.0007 ...
## $ PI_AU : num 0.001024 0.002211 0.0022 0.000804 0.000532 ...
## $ TajimaD_UK : num 0.808 0.293 0.877 1.426 0.968 ...
## $ TajimaD_US : num 0.42348 0.58012 0.80239 1.05163 -0.00424 ...
## $ TajimaD_AU : num 0.583 0.372 0.836 1.557 -0.557 ...
## $ SNP : int 1 2 3 4 5 6 7 8 9 10 ...
Adding in a calculation here so that we don’t have to repeat it when filtering div table.
div$piUK.piAU <- div$PI_UK - div$PI_AU
div$piUK.piUS <- div$PI_UK - div$PI_US
Now we’re ready to plot and calculate genome-wide values!
First, we look at the distribution of variation across the genome.
manhattan(div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_UKUS",
ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
cex=1,cex.axis=1, chrlabs=c("Z",1,"1A","1B",2,3,4,"4A",5:29))
pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/manhattans_for_pub/Fst.UKUS.Manhattan.pdf",w=12,h=3)
manhattan(div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_UKUS",
ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
cex=1,cex.axis=1, chrlabs=c("Z",1,"1A","1B",2,3,4,"4A",5:29))
dev.off()
## quartz_off_screen
## 2
manhattan(div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_AUUK",
ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
cex=1,cex.axis=1, chrlabs=c("Z",1,"1A","1B",2,3,4,"4A",5:29))
pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/manhattans_for_pub/Fst.AUUK.Manhattan.pdf",w=12,h=3)
manhattan(div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_AUUK",
ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
cex=1,cex.axis=1, chrlabs=c("Z",1,"1A","1B",2,3,4,"4A",5:29))
dev.off()
## quartz_off_screen
## 2
manhattan(div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_USAU",
ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
cex=1,cex.axis=1, chrlabs=c("Z",1,"1A","1B",2,3,4,"4A",5:29))
pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/Fst.USAU.Manhattan.pdf",w=12,h=3)
manhattan(div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_USAU",
ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
cex=1,cex.axis=1, chrlabs=c("Z",1,"1A","1B",2,3,4,"4A",5:29))
dev.off()
## quartz_off_screen
## 2
quantile(div$WEIGHTED_FST_AUUK, c(.9,.95,.99,.999))
## 90% 95% 99% 99.9%
## 0.0676462 0.0862444 0.1490122 0.3077638
quantile(div$WEIGHTED_FST_UKUS, c(.9,.95,.99,.999))
## 90% 95% 99% 99.9%
## 0.0441469 0.0609766 0.1156152 0.2228447
mean(div$WEIGHTED_FST_AUUK) + 5*sd(div$WEIGHTED_FST_AUUK)
## [1] 0.1936444
mean(div$WEIGHTED_FST_UKUS) + 5*sd(div$WEIGHTED_FST_UKUS)
## [1] 0.1406712
div.outliers.AUUK <- div[which(div$WEIGHTED_FST_AUUK > quantile(div$WEIGHTED_FST_AUUK,.99)),]
div.outliers.USUK <- div[which(div$WEIGHTED_FST_UKUS > quantile(div$WEIGHTED_FST_UKUS,.99)),]
div.hifst.AUUK <- div[which(div$WEIGHTED_FST_AUUK > 0.1),]
div.hifst.UKUS <- div[which(div$WEIGHTED_FST_UKUS > 0.1),]
unique(div.outliers.USUK$CHROM)
## [1] 11.00 12.00 13.00 17.00 19.00 1.25 1.00 28.00 2.00 3.00 4.50 4.00
## [13] 6.00 29.00 0.00
length(div.outliers.USUK$SNP)
## [1] 201
write.csv(div.outliers.USUK,"/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/FstOutliers.USUK.csv")
unique(div.outliers.AUUK$CHROM)
## [1] 10.00 12.00 13.00 17.00 18.00 1.25 1.00 23.00 27.00 2.00 3.00 4.50
## [13] 4.00 5.00 6.00 7.00 8.00 0.00
length(div.outliers.AUUK$SNP)
## [1] 201
write.csv(div.outliers.AUUK,"/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/FstOutliers.AUUK.csv")
What’s going on w/ other metrics at these outliers?
summary(div.outliers.AUUK$PI_UK)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000025 0.0000730 0.0001890 0.0009734 0.0014188 0.0057224
summary(div.outliers.AUUK$PI_AU)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.383e-05 7.067e-05 2.285e-04 9.324e-04 1.447e-03 5.313e-03
summary(div.outliers.AUUK$TajimaD_AU)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -2.3050 0.2574 0.7501 0.6479 1.2528 2.6922
summary(div.outliers.AUUK$TajimaD_UK)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.2937 0.1481 0.6493 0.5926 1.0098 2.3588
summary(div.outliers.USUK$PI_US)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 7.167e-06 6.250e-05 2.890e-04 8.204e-04 1.281e-03 5.202e-03
summary(div.outliers.USUK$PI_UK)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000210 0.0000765 0.0002912 0.0008650 0.0012335 0.0058409
summary(div.outliers.USUK$TajimaD_US)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -2.1488 -0.4076 0.4343 0.3082 0.9422 2.9728
summary(div.outliers.USUK$TajimaD_UK)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.9252 0.3047 0.8963 0.8645 1.4612 2.5895
library(fitdistrplus)
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
## Loading required package: survival
descdist(div.outliers.AUUK$PI_UK)
## summary statistics
## ------
## min: 2.5e-06 max: 0.00572238
## median: 0.000189
## mean: 0.0009734123
## estimated sd: 0.001348425
## estimated skewness: 1.597318
## estimated kurtosis: 4.653
descdist(div.outliers.AUUK$PI_AU)
## summary statistics
## ------
## min: 1.38333e-05 max: 0.00531275
## median: 0.0002285
## mean: 0.0009324382
## estimated sd: 0.001252321
## estimated skewness: 1.532558
## estimated kurtosis: 4.458218
descdist(div.outliers.USUK$PI_UK)
## summary statistics
## ------
## min: 2.1e-05 max: 0.00584092
## median: 0.000291167
## mean: 0.000865046
## estimated sd: 0.001150262
## estimated skewness: 1.693987
## estimated kurtosis: 5.349374
descdist(div.outliers.USUK$PI_US)
## summary statistics
## ------
## min: 7.16667e-06 max: 0.00520175
## median: 0.000289007
## mean: 0.0008203641
## estimated sd: 0.001078227
## estimated skewness: 1.65203
## estimated kurtosis: 5.131592
#beta.outliers.AUUK <- fitdist(div.outliers.AUUK$PI_AU, "beta")
#summary(beta.outliers.AUUK)
#beta.outliers.USUK <- fitdist(div.outliers.USUK$PI_US, "beta")
#summary(beta.outliers.USUK)
div.outliers.AUUK.lowFstUSAU <- div.outliers.AUUK[which(div.outliers.AUUK$WEIGHTED_FST_USAU < 0.01 ),]
div.outliers.AUUK.lowFstUSAU
## CHROM BIN_START BIN_END N_VARIANTS WEIGHTED_FST_UKUS MEAN_FST_UKUS
## 7 10.00 300001 350000 80 0.1027220 0.0600280
## 9 10.00 400001 450000 24 0.0578802 0.0408439
## 10 10.00 450001 500000 18 0.0641170 0.0438801
## 903 12.00 3500001 3550000 313 0.1841560 0.1390400
## 1414 13.00 8300001 8350000 6 0.0888562 0.0764819
## 2251 17.00 2350001 2400000 90 0.0718635 0.0453814
## 3399 1.25 25150001 25200000 80 0.1291380 0.1210420
## 3400 1.25 25200001 25250000 65 0.0763648 0.0755731
## 3793 1.25 44850001 44900000 7 0.1502550 0.1512780
## 3946 1.25 52500001 52550000 9 0.1002160 0.0841356
## 13219 4.50 6100001 6150000 132 0.1381170 0.1304820
## 13220 4.50 6150001 6200000 84 0.1382960 0.1325750
## 16265 6.00 5500001 5550000 61 0.2868590 0.2827380
## 16267 6.00 5600001 5650000 66 0.2488270 0.2538260
## 16268 6.00 5650001 5700000 90 0.2626990 0.2607650
## 16271 6.00 5800001 5850000 95 0.1569920 0.1609480
## 19739 0.00 51900001 51950000 60 0.1874130 0.1492570
## 19962 0.00 63050001 63100000 232 0.1055780 0.0686902
## 19999 0.00 64900001 64950000 186 0.1765470 0.1102810
## 20000 0.00 64950001 65000000 240 0.2637760 0.1947360
## 20002 0.00 65050001 65100000 450 0.1136620 0.0975800
## WEIGHTED_FST_AUUK MEAN_FST_AUUK WEIGHTED_FST_USAU MEAN_FST_USAU
## 7 0.190274 0.122576 0.000000000 0.000000000
## 9 0.150496 0.109492 0.000000000 0.000000000
## 10 0.197232 0.155583 0.000000000 0.000000000
## 903 0.172857 0.132176 0.000000000 0.000000000
## 1414 0.165927 0.125393 0.004134130 0.019587600
## 2251 0.179935 0.149568 0.000000000 0.000475237
## 3399 0.187165 0.173330 0.000000000 0.000000000
## 3400 0.179424 0.168180 0.000000000 0.000000000
## 3793 0.192503 0.166546 0.008088430 0.000000000
## 3946 0.157895 0.133751 0.000000000 0.000000000
## 13219 0.149469 0.141418 0.000000000 0.000000000
## 13220 0.164826 0.153364 0.000000000 0.000000000
## 16265 0.178806 0.178051 0.000000000 0.000000000
## 16267 0.155095 0.162605 0.000000000 0.000000000
## 16268 0.172276 0.167504 0.000000000 0.010494800
## 16271 0.155570 0.151752 0.004985280 0.006458370
## 19739 0.158887 0.126400 0.006943530 0.004539620
## 19962 0.159585 0.111195 0.000000000 0.003472230
## 19999 0.250744 0.187151 0.004209500 0.002449910
## 20000 0.230248 0.162655 0.000000000 0.000000000
## 20002 0.171135 0.133933 0.000369775 0.000000000
## PI_UK PI_US PI_AU TajimaD_UK TajimaD_US TajimaD_AU
## 7 5.80833e-04 3.92847e-04 2.90669e-04 1.010540 -0.569214 -1.333590
## 9 1.45667e-04 1.08668e-04 7.26667e-05 0.674272 -0.938436 -1.463810
## 10 1.15667e-04 7.03333e-05 3.06667e-05 0.510990 -1.402140 -0.520315
## 903 1.58583e-03 2.29027e-03 2.38120e-03 0.322469 1.214250 1.230810
## 1414 3.53333e-05 4.86667e-05 5.13333e-05 -0.078611 2.026320 1.435800
## 2251 5.73500e-04 5.66689e-04 5.08838e-04 0.877600 0.367470 0.645267
## 3399 6.12500e-04 8.16000e-04 7.46855e-04 1.160520 2.972800 2.474060
## 3400 4.95167e-04 6.38006e-04 5.69339e-04 1.386520 2.737810 2.391710
## 3793 6.46667e-05 4.11669e-05 4.38335e-05 1.877030 0.118964 0.239407
## 3946 7.31667e-05 6.43336e-05 5.60000e-05 1.281030 0.765878 0.581803
## 13219 1.16983e-03 4.09190e-04 3.88846e-04 2.126910 -2.148800 -2.179840
## 13220 7.55674e-04 2.66003e-04 2.38502e-04 2.154800 -2.022640 -2.304990
## 16265 2.88333e-05 5.33346e-04 4.75333e-04 1.723710 2.056500 1.339670
## 16267 5.08333e-05 5.92009e-04 5.25841e-04 0.721746 2.161640 1.510490
## 16268 6.93333e-05 7.88034e-04 6.67837e-04 -1.293670 2.136270 1.375800
## 16271 1.58333e-04 6.64365e-04 7.56515e-04 -0.174387 1.129640 1.427160
## 19739 3.98671e-04 3.79014e-04 4.32010e-04 1.206870 0.930709 1.050540
## 19962 1.55317e-03 1.37795e-03 1.14304e-03 0.640391 0.573849 0.685643
## 19999 8.16667e-04 1.26139e-03 1.36611e-03 -0.243400 0.635922 1.156450
## 20000 1.24650e-03 1.72167e-03 1.74439e-03 -0.207223 1.104400 1.037880
## 20002 2.43118e-03 3.10507e-03 2.86397e-03 0.924093 0.736868 0.386692
## SNP piUK.piAU piUK.piUS
## 7 7 0.0002901640 0.0001879860
## 9 9 0.0000730003 0.0000369990
## 10 10 0.0000850003 0.0000453337
## 903 903 -0.0007953700 -0.0007044400
## 1414 1414 -0.0000160000 -0.0000133334
## 2251 2251 0.0000646620 0.0000068110
## 3399 3399 -0.0001343550 -0.0002035000
## 3400 3400 -0.0000741720 -0.0001428390
## 3793 3793 0.0000208332 0.0000234998
## 3946 3946 0.0000171667 0.0000088331
## 13219 13219 0.0007809840 0.0007606400
## 13220 13220 0.0005171720 0.0004896710
## 16265 16265 -0.0004464997 -0.0005045127
## 16267 16267 -0.0004750077 -0.0005411757
## 16268 16268 -0.0005985037 -0.0007187007
## 16271 16271 -0.0005981820 -0.0005060320
## 19739 19739 -0.0000333390 0.0000196570
## 19962 19962 0.0004101300 0.0001752200
## 19999 19999 -0.0005494430 -0.0004447230
## 20000 20000 -0.0004978900 -0.0004751700
## 20002 20002 -0.0004327900 -0.0006738900
length(div.outliers.AUUK.lowFstUSAU$SNP)
## [1] 21
div.outliers.USUK.lowFstUSAU <- div.outliers.USUK[which(div.outliers.USUK$WEIGHTED_FST_USAU < 0.01 ),]
div.outliers.USUK.lowFstUSAU
## CHROM BIN_START BIN_END N_VARIANTS WEIGHTED_FST_UKUS MEAN_FST_UKUS
## 903 12.00 3500001 3550000 313 0.184156 0.1390400
## 3399 1.25 25150001 25200000 80 0.129138 0.1210420
## 3772 1.25 43800001 43850000 9 0.233478 0.2201170
## 3773 1.25 43850001 43900000 9 0.164466 0.1563530
## 3774 1.25 43900001 43950000 8 0.222953 0.2224730
## 3781 1.25 44250001 44300000 8 0.198336 0.1773510
## 3788 1.25 44600001 44650000 8 0.183673 0.1641710
## 3790 1.25 44700001 44750000 9 0.198634 0.1724300
## 3791 1.25 44750001 44800000 4 0.126050 0.1050630
## 3792 1.25 44800001 44850000 5 0.158329 0.1520360
## 3793 1.25 44850001 44900000 7 0.150255 0.1512780
## 3826 1.25 46500001 46550000 16 0.119742 0.1104310
## 3941 1.25 52250001 52300000 6 0.144998 0.1072330
## 3950 1.25 52700001 52750000 3 0.122153 0.0888192
## 6514 1.00 106850001 106900000 40 0.132203 0.1245470
## 13217 4.50 6000001 6050000 87 0.127251 0.1166620
## 13218 4.50 6050001 6100000 122 0.124097 0.1208020
## 13219 4.50 6100001 6150000 132 0.138117 0.1304820
## 13220 4.50 6150001 6200000 84 0.138296 0.1325750
## 14057 4.00 27600001 27650000 43 0.124484 0.1000850
## 14058 4.00 27650001 27700000 15 0.124772 0.1110270
## 14243 4.00 36900001 36950000 120 0.144461 0.1311450
## 16262 6.00 5350001 5400000 23 0.152651 0.1426320
## 16265 6.00 5500001 5550000 61 0.286859 0.2827380
## 16266 6.00 5550001 5600000 76 0.236749 0.2467770
## 16267 6.00 5600001 5650000 66 0.248827 0.2538260
## 16268 6.00 5650001 5700000 90 0.262699 0.2607650
## 16269 6.00 5700001 5750000 118 0.200669 0.1998990
## 16270 6.00 5750001 5800000 84 0.189015 0.1918170
## 16271 6.00 5800001 5850000 95 0.156992 0.1609480
## 19081 0.00 18950001 19000000 222 0.189965 0.1459400
## 19082 0.00 19000001 19050000 193 0.145196 0.1232680
## 19739 0.00 51900001 51950000 60 0.187413 0.1492570
## 19800 0.00 54950001 55000000 332 0.138968 0.1067000
## 19932 0.00 61550001 61600000 113 0.165185 0.1225240
## 19999 0.00 64900001 64950000 186 0.176547 0.1102810
## 20000 0.00 64950001 65000000 240 0.263776 0.1947360
## 20001 0.00 65000001 65050000 277 0.128128 0.0954193
## 20006 0.00 65250001 65300000 194 0.175280 0.1503040
## WEIGHTED_FST_AUUK MEAN_FST_AUUK WEIGHTED_FST_USAU MEAN_FST_USAU
## 903 0.1728570 0.1321760 0.00000000 0.00000000
## 3399 0.1871650 0.1733300 0.00000000 0.00000000
## 3772 0.1356170 0.1306400 0.00000000 0.00000000
## 3773 0.0852238 0.0765575 0.00000000 0.00000000
## 3774 0.1335340 0.1317840 0.00000000 0.00000000
## 3781 0.1177940 0.1036950 0.00646088 0.00000000
## 3788 0.1177940 0.1036950 0.00209059 0.00000000
## 3790 0.1181560 0.1062690 0.00000000 0.00000000
## 3791 0.1087810 0.0889597 0.00000000 0.00000000
## 3792 0.1012870 0.1007000 0.00000000 0.00000000
## 3793 0.1925030 0.1665460 0.00808843 0.00000000
## 3826 0.1379380 0.1391420 0.00000000 0.00899771
## 3941 0.0853294 0.0588546 0.00000000 0.00000000
## 3950 0.0778325 0.0539009 0.00000000 0.00000000
## 6514 0.1120650 0.1021280 0.00000000 0.00000000
## 13217 0.1235350 0.1125110 0.00000000 0.00000000
## 13218 0.1407820 0.1299840 0.00000000 0.00000000
## 13219 0.1494690 0.1414180 0.00000000 0.00000000
## 13220 0.1648260 0.1533640 0.00000000 0.00000000
## 14057 0.0421729 0.0368661 0.00000000 0.00000000
## 14058 0.0708094 0.0544110 0.00000000 0.00000000
## 14243 0.0396591 0.0321878 0.00000000 0.00000000
## 16262 0.1195050 0.1207770 0.00000000 0.00000000
## 16265 0.1788060 0.1780510 0.00000000 0.00000000
## 16266 0.1419130 0.1501990 0.00000000 0.00000000
## 16267 0.1550950 0.1626050 0.00000000 0.00000000
## 16268 0.1722760 0.1675040 0.00000000 0.01049480
## 16269 0.1428110 0.1457580 0.00000000 0.00000000
## 16270 0.1452970 0.1502240 0.00000000 0.00000000
## 16271 0.1555700 0.1517520 0.00498528 0.00645837
## 19081 0.0750444 0.0538881 0.00000000 0.00000000
## 19082 0.0193002 0.0267927 0.00000000 0.00000000
## 19739 0.1588870 0.1264000 0.00694353 0.00453962
## 19800 0.0791149 0.0666468 0.00000000 0.00000000
## 19932 0.0833852 0.0580127 0.00000000 0.00000000
## 19999 0.2507440 0.1871510 0.00420950 0.00244991
## 20000 0.2302480 0.1626550 0.00000000 0.00000000
## 20001 0.1301150 0.1041760 0.00000000 0.00000000
## 20006 0.1216110 0.1062280 0.00000000 0.00000000
## PI_UK PI_US PI_AU TajimaD_UK TajimaD_US TajimaD_AU
## 903 1.58583e-03 2.29027e-03 2.38120e-03 0.322469 1.214250 1.2308100
## 3399 6.12500e-04 8.16000e-04 7.46855e-04 1.160520 2.972800 2.4740600
## 3772 9.01667e-05 2.65000e-05 4.65000e-05 2.432020 -1.878520 -0.5244280
## 3773 8.30000e-05 4.03333e-05 4.91667e-05 1.946800 -0.941941 -0.3438820
## 3774 8.16667e-05 2.76668e-05 4.20002e-05 2.500830 -1.353500 -0.3733890
## 3781 7.41667e-05 2.76667e-05 3.45000e-05 1.940080 -1.536610 -0.6423260
## 3788 7.41667e-05 2.98333e-05 3.45000e-05 1.940080 -1.374610 -0.6423260
## 3790 8.61667e-05 2.65001e-05 3.85002e-05 2.161200 -1.833620 -0.6350750
## 3791 3.55000e-05 1.96667e-05 2.45000e-05 1.478110 -0.576488 0.0507059
## 3792 4.75000e-05 2.45000e-05 3.60003e-05 1.898740 -0.616373 0.2162880
## 3793 6.46667e-05 4.11669e-05 4.38335e-05 1.877030 0.118964 0.2394070
## 3826 1.56500e-04 1.14501e-04 1.17334e-04 2.455710 0.890794 1.0316700
## 3941 4.66669e-05 1.55001e-05 2.20002e-05 1.110000 -1.317040 -0.6062470
## 3950 2.56667e-05 1.18333e-05 1.58333e-05 1.216020 -1.001800 -0.3605060
## 6514 2.03833e-04 3.69672e-04 3.52672e-04 -0.557074 2.328280 2.7553400
## 13217 7.23833e-04 2.57341e-04 3.00170e-04 1.775250 -2.075340 -1.8571700
## 13218 1.04117e-03 4.05516e-04 3.68677e-04 2.007180 -1.910490 -2.1351400
## 13219 1.16983e-03 4.09190e-04 3.88846e-04 2.126910 -2.148800 -2.1798400
## 13220 7.55674e-04 2.66003e-04 2.38502e-04 2.154800 -2.022640 -2.3049900
## 14057 3.81000e-04 3.27002e-04 3.94502e-04 2.124500 1.261110 2.2358600
## 14058 1.20500e-04 9.08347e-05 1.06668e-04 1.303780 1.346540 1.2527000
## 14243 1.05652e-03 8.72538e-04 9.60703e-04 2.022060 1.051440 1.8017000
## 16262 5.05000e-05 1.48335e-04 1.75833e-04 -0.253609 0.363975 0.8790340
## 16265 2.88333e-05 5.33346e-04 4.75333e-04 1.723710 2.056500 1.3396700
## 16266 9.40005e-05 6.90347e-04 6.21012e-04 0.189762 2.226990 1.4170800
## 16267 5.08333e-05 5.92009e-04 5.25841e-04 0.721746 2.161640 1.5104900
## 16268 6.93333e-05 7.88034e-04 6.67837e-04 -1.293670 2.136270 1.3758000
## 16269 1.06667e-04 9.61546e-04 9.50343e-04 -1.925220 1.627650 1.3995400
## 16270 7.71667e-05 6.45525e-04 6.42512e-04 -0.682127 1.393540 1.2760900
## 16271 1.58333e-04 6.64365e-04 7.56515e-04 -0.174387 1.129640 1.4271600
## 19081 1.18901e-03 1.24461e-03 1.22436e-03 0.862748 0.758027 0.7976620
## 19082 1.11667e-03 1.30971e-03 1.30786e-03 2.029490 0.784797 1.0812900
## 19739 3.98671e-04 3.79014e-04 4.32010e-04 1.206870 0.930709 1.0505400
## 19800 2.37000e-03 1.99995e-03 2.30563e-03 1.246390 0.748836 1.2112400
## 19932 6.72170e-04 8.29064e-04 8.34358e-04 0.502783 1.071620 1.1047700
## 19999 8.16667e-04 1.26139e-03 1.36611e-03 -0.243400 0.635922 1.1564500
## 20000 1.24650e-03 1.72167e-03 1.74439e-03 -0.207223 1.104400 1.0378800
## 20001 1.53219e-03 1.96551e-03 2.01766e-03 0.304739 1.123120 1.3799400
## 20006 1.57217e-03 1.30438e-03 1.47139e-03 1.530390 1.099090 1.1772900
## SNP piUK.piAU piUK.piUS
## 903 903 -0.0007953700 -0.0007044400
## 3399 3399 -0.0001343550 -0.0002035000
## 3772 3772 0.0000436667 0.0000636667
## 3773 3773 0.0000338333 0.0000426667
## 3774 3774 0.0000396665 0.0000539999
## 3781 3781 0.0000396667 0.0000465000
## 3788 3788 0.0000396667 0.0000443334
## 3790 3790 0.0000476665 0.0000596666
## 3791 3791 0.0000110000 0.0000158333
## 3792 3792 0.0000114997 0.0000230000
## 3793 3793 0.0000208332 0.0000234998
## 3826 3826 0.0000391660 0.0000419990
## 3941 3941 0.0000246667 0.0000311668
## 3950 3950 0.0000098334 0.0000138334
## 6514 6514 -0.0001488390 -0.0001658390
## 13217 13217 0.0004236630 0.0004664920
## 13218 13218 0.0006724930 0.0006356540
## 13219 13219 0.0007809840 0.0007606400
## 13220 13220 0.0005171720 0.0004896710
## 14057 14057 -0.0000135020 0.0000539980
## 14058 14058 0.0000138320 0.0000296653
## 14243 14243 0.0000958170 0.0001839820
## 16262 16262 -0.0001253330 -0.0000978350
## 16265 16265 -0.0004464997 -0.0005045127
## 16266 16266 -0.0005270115 -0.0005963465
## 16267 16267 -0.0004750077 -0.0005411757
## 16268 16268 -0.0005985037 -0.0007187007
## 16269 16269 -0.0008436760 -0.0008548790
## 16270 16270 -0.0005653453 -0.0005683583
## 16271 16271 -0.0005981820 -0.0005060320
## 19081 19081 -0.0000353500 -0.0000556000
## 19082 19082 -0.0001911900 -0.0001930400
## 19739 19739 -0.0000333390 0.0000196570
## 19800 19800 0.0000643700 0.0003700500
## 19932 19932 -0.0001621880 -0.0001568940
## 19999 19999 -0.0005494430 -0.0004447230
## 20000 20000 -0.0004978900 -0.0004751700
## 20001 20001 -0.0004854700 -0.0004333200
## 20006 20006 0.0001007800 0.0002677900
length(div.outliers.USUK.lowFstUSAU$SNP)
## [1] 39
Possible parallel “selection” ?
What’s the statistical distribution of these values?
descdist(div$WEIGHTED_FST_AUUK)
## summary statistics
## ------
## min: 0 max: 0.401902
## median: 0.0265902
## mean: 0.03258447
## estimated sd: 0.03221199
## estimated skewness: 2.902543
## estimated kurtosis: 19.80308
descdist(div$WEIGHTED_FST_UKUS)
## summary statistics
## ------
## min: 0 max: 0.34149
## median: 0.0125022
## mean: 0.01873338
## estimated sd: 0.02438756
## estimated skewness: 3.325395
## estimated kurtosis: 22.98486
descdist(div$WEIGHTED_FST_USAU)
## summary statistics
## ------
## min: 0 max: 0.482831
## median: 0.0341419
## mean: 0.04037524
## estimated sd: 0.03744596
## estimated skewness: 3.350824
## estimated kurtosis: 25.99446
lab.AU <- rep("AU.UK",length(div$WEIGHTED_FST_AUUK))
lab.US <- rep("UK.US",length(div$WEIGHTED_FST_UKUS))
Fst.group <- c(lab.AU,lab.US)
Fst.hist.data <- c(div$WEIGHTED_FST_AUUK,div$WEIGHTED_FST_USUK)
Fst.hist <- data.frame(Fst = Fst.hist.data, population = Fst.group)
pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/HistDensity_Fst.pdf",width=4,height=3)
ggplot(Fst.hist, aes(x=Fst, y=..density.., fill=population)) +
theme_classic() +
geom_density(alpha=0.5,lwd=0.5) +
scale_fill_manual(values=c("#F2C14E","#2c81a8")) + xlim(0,0.5) +
xlab("Fst") + labs(fill="Population") +
geom_vline(xintercept=0.03,colour=alpha("#F2C14E"),linetype="dashed", size=1) +
geom_vline(xintercept=0.01,colour=alpha("#2c81a8"),linetype="dashed", size=1) +
geom_vline(xintercept=0.08,colour=alpha("gray50"),linetype="dotted", size=0.5)
dev.off()
## quartz_off_screen
## 2
ggplot(Fst.hist, aes(x=Fst, y=..density.., fill=population)) +
theme_classic() +
geom_density(alpha=0.5,lwd=0.5) +
scale_fill_manual(values=c("#F2C14E","#2c81a8")) + xlim(0,0.5) +
xlab("Fst") + labs(fill="Population") +
geom_vline(xintercept=0.03,colour=alpha("#F2C14E"),linetype="dashed", size=1) +
geom_vline(xintercept=0.01,colour=alpha("#2c81a8"),linetype="dashed", size=1) +
geom_vline(xintercept=0.08,colour=alpha("gray50"),linetype="dotted", size=0.5)
ggplot(data=div) +
geom_point(aes(x=div$WEIGHTED_FST_UKUS, y=div$PI_US),col="#2c81a8",cex=0.7) +
#xlab("Fst (Native vs. Invasive)") + ylab("Pi Invasive") +
xlab("") + ylab("") +
stat_smooth(aes(x=WEIGHTED_FST_UKUS, y=PI_US),method="loess",col="black",lwd=0.5) +
xlim(0,0.31) + ylim(0,0.04) + theme_classic()
## Warning: Use of `div$WEIGHTED_FST_UKUS` is discouraged. Use `WEIGHTED_FST_UKUS`
## instead.
## Warning: Use of `div$PI_US` is discouraged. Use `PI_US` instead.
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 3 rows containing non-finite values (stat_smooth).
## Warning: Removed 3 rows containing missing values (geom_point).
pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/FstPi.density.USUK.pdf",height=2,width=2)
ggplot(data=div) +
geom_point(aes(x=div$WEIGHTED_FST_UKUS, y=div$PI_US),col="#2c81a8",cex=0.7) +
#xlab("Fst (Native vs. Invasive)") + ylab("Pi Invasive") +
xlab("") + ylab("") +
stat_smooth(aes(x=WEIGHTED_FST_UKUS, y=PI_US),method="loess",col="black",lwd=0.5) +
xlim(0,0.31) + ylim(0,0.04) + theme_classic()
## Warning: Use of `div$WEIGHTED_FST_UKUS` is discouraged. Use `WEIGHTED_FST_UKUS`
## instead.
## Warning: Use of `div$PI_US` is discouraged. Use `PI_US` instead.
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 3 rows containing non-finite values (stat_smooth).
## Warning: Removed 3 rows containing missing values (geom_point).
dev.off()
## quartz_off_screen
## 2
ggplot(data=div) +
geom_point(aes(x=div$WEIGHTED_FST_AUUK, y=div$PI_AU),col="#F2C14E",cex=0.7) +
xlim(0,0.31) + ylim(0,0.04) + theme_classic() +
#xlab("Fst (Native vs. Invasive)") + ylab("Pi Invasive") +
stat_smooth(aes(x=WEIGHTED_FST_AUUK, y=PI_AU),method="loess",col="black",lwd=0.5) +
xlab("") + ylab("")
## Warning: Use of `div$WEIGHTED_FST_AUUK` is discouraged. Use `WEIGHTED_FST_AUUK`
## instead.
## Warning: Use of `div$PI_AU` is discouraged. Use `PI_AU` instead.
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 17 rows containing non-finite values (stat_smooth).
## Warning: Removed 17 rows containing missing values (geom_point).
pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/FstPi.density.AUUK.pdf",height=2,width=2)
ggplot(data=div) +
geom_point(aes(x=div$WEIGHTED_FST_AUUK, y=div$PI_AU),col="#F2C14E",cex=0.7) +
xlim(0,0.31) + ylim(0,0.04) + theme_classic() +
#xlab("Fst (Native vs. Invasive)") + ylab("Pi Invasive") +
stat_smooth(aes(x=WEIGHTED_FST_AUUK, y=PI_AU),method="loess",col="black",lwd=0.5) +
xlab("") + ylab("")
## Warning: Use of `div$WEIGHTED_FST_AUUK` is discouraged. Use `WEIGHTED_FST_AUUK`
## instead.
## Warning: Use of `div$PI_AU` is discouraged. Use `PI_AU` instead.
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 17 rows containing non-finite values (stat_smooth).
## Warning: Removed 17 rows containing missing values (geom_point).
dev.off()
## quartz_off_screen
## 2
summary(div$PI_AU)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000025 0.0027682 0.0040090 0.0038645 0.0050565 0.0138998
summary(div$PI_UK)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000025 0.0028343 0.0041312 0.0039757 0.0051943 0.0141869
summary(div$PI_US)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 4.667e-06 2.753e-03 4.016e-03 3.863e-03 5.045e-03 1.409e-02
descdist(div$PI_US)
## summary statistics
## ------
## min: 4.66667e-06 max: 0.0140869
## median: 0.00401555
## mean: 0.003862564
## estimated sd: 0.001783127
## estimated skewness: -0.1258447
## estimated kurtosis: 3.05688
#norm.piUS.fit <- fitdist(div$PI_US,distr="norm")
#summary(norm.piUS.fit)
descdist(div$PI_UK)
## summary statistics
## ------
## min: 2.5e-06 max: 0.0141869
## median: 0.00413121
## mean: 0.003975727
## estimated sd: 0.001837387
## estimated skewness: -0.1188124
## estimated kurtosis: 3.055468
#norm.piUK.fit <- fitdist(div$PI_UK,distr="norm")
#summary(norm.piUK.fit)
descdist(div$PI_AU)
## summary statistics
## ------
## min: 2.5e-06 max: 0.0138998
## median: 0.00400898
## mean: 0.003864496
## estimated sd: 0.001775347
## estimated skewness: -0.1396863
## estimated kurtosis: 3.030743
#norm.piAU.fit <- fitdist(div$PI_AU,distr="norm")
#summary(norm.piAU.fit)
lab.AU <- rep("AU",length(div$PI_AU))
lab.US <- rep("US",length(div$PI_US))
lab.UK <- rep("UK",length(div$PI_UK))
group <- c(lab.AU,lab.US,lab.UK)
pi.hist.data <- c(div$PI_UK,div$PI_US,div$PI_AU)
pi.hist.lab <- data.frame(pi = pi.hist.data, population = group)
str(pi.hist.lab)
## 'data.frame': 60213 obs. of 2 variables:
## $ pi : num 0.001053 0.002189 0.002212 0.000915 0.000879 ...
## $ population: Factor w/ 3 levels "AU","UK","US": 1 1 1 1 1 1 1 1 1 1 ...
ggplot(pi.hist.lab, aes(x=pi, y=..density.., fill=population)) +
geom_density(alpha=0.8,lwd=0.5) + theme_classic() +
scale_fill_manual(values=c("black","#2c81a8","#F2C14E")) + xlim(-0.0001,0.02) +
xlab("Pi") + labs(fill="Population") +
geom_vline(xintercept=mean(div$PI_AU),colour=alpha("#F2C14E"),linetype="dashed", size=1) +
geom_vline(xintercept=mean(div$PI_US),colour=alpha("#2c81a8"),linetype="dashed", size=1) +
theme(legend.position="none")
pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/HistDensity_Pi.pdf",width=4,height=3)
ggplot(pi.hist.lab, aes(x=pi, y=..density.., fill=population)) +
geom_density(alpha=0.8,lwd=0.5) + theme_classic() +
scale_fill_manual(values=c("black","#2c81a8","#F2C14E")) + xlim(-0.0001,0.02) +
xlab("Pi") + labs(fill="Population") +
geom_vline(xintercept=mean(div$PI_US),colour=alpha("#2c81a8"),linetype="dashed", size=1) +
geom_vline(xintercept=mean(div$PI_AU),colour=alpha("#F2C14E"),linetype="dashed", size=1) +
theme(legend.position="none")
dev.off()
## quartz_off_screen
## 2
Average nucleotide diversity for both invasions is the same (0.003). There are two vertical lines overlaid in the plot above.
ggplot(data=div) +
geom_point(aes(x=PI_UK, y=PI_US),col="#2c81a8",cex=0.7) +
xlab("") + ylab("") + xlim(0,0.02) + ylim(0,0.02) + theme_classic() +
theme(axis.text=element_text(size=7,colour="black")) +
stat_smooth(aes(x=PI_UK, y=PI_US),span=0.2,method="loess",col="black",lwd=0.5)
## `geom_smooth()` using formula 'y ~ x'
pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/Pi_USvsUK.pdf",width=2,height=2)
ggplot(data=div) +
geom_point(aes(x=PI_UK, y=PI_US),col="#2c81a8",cex=0.7) +
xlab("") + ylab("") + xlim(0,0.02) + ylim(0,0.02) + theme_classic() +
theme(axis.text=element_text(size=7,colour="black")) +
stat_smooth(aes(x=PI_UK, y=PI_US),span=0.2,method="loess",col="black",lwd=0.5)
## `geom_smooth()` using formula 'y ~ x'
dev.off()
## quartz_off_screen
## 2
ggplot(data=div.outliers.USUK) +
geom_point(aes(x=PI_UK, y=PI_US),col="#2c81a8",cex=0.7) +
xlab("") + ylab("") + xlim(0,0.01) + ylim(0,0.01) + theme_classic() +
theme(axis.text=element_text(size=7,colour="black")) +
stat_smooth(aes(x=PI_UK, y=PI_US),span=0.2,method="loess",col="black",lwd=0.5)
## `geom_smooth()` using formula 'y ~ x'
pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/PiOutliers_USvsUK.pdf",width=2,height=2)
ggplot(data=div.outliers.USUK) +
geom_point(aes(x=PI_UK, y=PI_US),col="#2c81a8",cex=0.7) +
xlab("") + ylab("") + xlim(0,0.01) + ylim(0,0.01) + theme_classic() +
theme(axis.text=element_text(size=7,colour="black")) +
stat_smooth(aes(x=PI_UK, y=PI_US),span=0.2,method="loess",col="black",lwd=0.5)
## `geom_smooth()` using formula 'y ~ x'
dev.off()
## quartz_off_screen
## 2
ggplot(data=div) +
geom_point(aes(x=PI_UK, y=PI_AU),col="#F2C14E",cex=0.7) +
xlab("") + ylab("") +
xlim(0,0.02) + ylim(0,0.02) + theme_classic() +
theme(axis.text=element_text(size=7,colour="black")) +
stat_smooth(aes(x=PI_UK, y=PI_AU),span=0.2,method="loess",col="black",lwd=0.5)
## `geom_smooth()` using formula 'y ~ x'
pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/Pi_AUvsUK.pdf",width=2,height=2)
ggplot(data=div) +
geom_point(aes(x=PI_UK, y=PI_AU),col="#F2C14E",cex=0.7) +
xlab("") + ylab("") +
xlim(0,0.02) + ylim(0,0.02) + theme_classic() +
theme(axis.text=element_text(size=7,colour="black")) +
stat_smooth(aes(x=PI_UK, y=PI_AU),span=0.2,method="loess",col="black",lwd=0.5)
## `geom_smooth()` using formula 'y ~ x'
dev.off()
## quartz_off_screen
## 2
ggplot(data=div.outliers.AUUK) +
geom_point(aes(x=PI_UK, y=PI_AU),col="#F2C14E",cex=0.7) +
xlab("") + ylab("") +
xlim(0,0.01) + ylim(0,0.01) + theme_classic() +
theme(axis.text=element_text(size=7,colour="black")) +
stat_smooth(aes(x=PI_UK, y=PI_AU),span=0.2,method="loess",col="black",lwd=0.5)
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1 rows containing missing values (geom_smooth).
dev.off()
## null device
## 1
pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/Pi_AUvsUK.pdf",width=2,height=2)
ggplot(data=div.outliers.AUUK) +
geom_point(aes(x=PI_UK, y=PI_AU),col="#F2C14E",cex=0.7) +
xlab("") + ylab("") +
xlim(0,0.01) + ylim(0,0.01) + theme_classic() +
theme(axis.text=element_text(size=7,colour="black")) +
stat_smooth(aes(x=PI_UK, y=PI_AU),span=0.2,method="loess",col="black",lwd=0.5)
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1 rows containing missing values (geom_smooth).
dev.off()
## null device
## 1
descdist(div$TajimaD_US)
## summary statistics
## ------
## min: -2.1488 max: 3.16509
## median: 0.744616
## mean: 0.7115798
## estimated sd: 0.3466121
## estimated skewness: -1.309557
## estimated kurtosis: 13.47233
descdist(div$TajimaD_UK)
## summary statistics
## ------
## min: -2.2276 max: 3.23445
## median: 0.737875
## mean: 0.7152536
## estimated sd: 0.3263563
## estimated skewness: -0.4741872
## estimated kurtosis: 12.17869
descdist(div$TajimaD_AU)
## summary statistics
## ------
## min: -2.33194 max: 3.16824
## median: 0.798065
## mean: 0.7829885
## estimated sd: 0.3372602
## estimated skewness: -0.5499719
## estimated kurtosis: 12.87531
descdist(div.outliers.USUK$TajimaD_US)
## summary statistics
## ------
## min: -2.1488 max: 2.9728
## median: 0.434343
## mean: 0.3082123
## estimated sd: 1.017184
## estimated skewness: -0.2567428
## estimated kurtosis: 2.728269
descdist(div.outliers.AUUK$TajimaD_AU)
## summary statistics
## ------
## min: -2.30499 max: 2.69219
## median: 0.750086
## mean: 0.6478708
## estimated sd: 0.8718095
## estimated skewness: -0.7820734
## estimated kurtosis: 3.98327
What’s the difference in diversity between native and invasive ranges?
When comparing ancestral diversity to a bottlenecked invasive population, we’d expect to see lower diversity in the invasive range overall. This difference would be more pronounced in regions that had differentiated from the native range (e.g., where drift and/or selection are more pronounced).
Here, difference in pi > 0 means that diversity is higher in the native range than in the invasive.
summary(div$piUK.piAU)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.552e-03 -1.935e-05 9.278e-05 1.112e-04 2.319e-04 2.028e-03
qqnorm(div$piUK.piAU, pch = 16)
qqline(div$piUK.piAU, pch = 16)
descdist(div$piUK.piAU)
## summary statistics
## ------
## min: -0.00155244 max: 0.00202796
## median: 9.278e-05
## mean: 0.0001112317
## estimated sd: 0.0002326892
## estimated skewness: 0.4570582
## estimated kurtosis: 5.950369
summary(div$piUK.piUS)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.381e-03 -2.535e-06 1.035e-04 1.132e-04 2.277e-04 1.762e-03
qqnorm(div$piUK.piUS, pch = 16)
qqline(div$piUK.piUS, pch = 16)
descdist(div$piUK.piUS)
## summary statistics
## ------
## min: -0.00138117 max: 0.00176181
## median: 0.000103491
## mean: 0.0001131636
## estimated sd: 0.0002065841
## estimated skewness: 0.1527603
## estimated kurtosis: 5.842408
div.hiUKpi.vsAU <- div[which(div$piUK.piAU > 0),]
div.hiUKpi.vsUS <- div[which(div$piUK.piUS > 0),]
div.hiUKpi.both <- div.hiUKpi.vsAU[which(div$piUK.piUS > 0),]
length(div.hiUKpi.vsAU$piUK.piAU)/length(div$piUK.piAU) # % of windows that have higher pi in native
## [1] 0.7049973
length(div.hiUKpi.vsUS$piUK.piUS)/length(div$piUK.piUS)
## [1] 0.7432116
length(div.hiUKpi.both$piUK.piUS)/length(div$piUK.piUS) # % of windows with higher pi in native for both invasive ranges
## [1] 0.7432116
If drift acting on novel mutations explains most of the differentiation, then where FST is high, diversity should also be higher in the invasions. Could also be due to weaker purifying selection during population expansion.
descdist(div.hifst.AUUK$piUK.piAU)
## summary statistics
## ------
## min: -0.00119697 max: 0.00202796
## median: 3.283325e-05
## mean: 0.0001246263
## estimated sd: 0.0004012797
## estimated skewness: 0.807314
## estimated kurtosis: 4.950103
div.hifst.AUUK.hiUKpi <- div.hifst.AUUK[which(div.hifst.AUUK$piUK.piAU > 0),]
length(div.hifst.AUUK.hiUKpi$piUK.piAU)/length(div.hifst.AUUK$piUK.piAU) # % of high FST windows that have higher pi in native
## [1] 0.6224189
descdist(div.hifst.UKUS$piUK.piUS)
## summary statistics
## ------
## min: -0.00119758 max: 0.00176181
## median: 2.5496e-05
## mean: 6.588734e-05
## estimated sd: 0.0003507603
## estimated skewness: 0.6636127
## estimated kurtosis: 6.832871
div.hifst.UKUS.hiUKpi <- div.hifst.UKUS[which(div.hifst.UKUS$piUK.piUS > 0),]
length(div.hifst.UKUS.hiUKpi$piUK.piUS)/length(div.hifst.UKUS$piUK.piUS)
## [1] 0.6989619
div.hiUKpi.both <- div.hifst.UKUS.hiUKpi[which(div.hifst.AUUK.hiUKpi$piUK.piUS > 0),]
length(div.hiUKpi.both$piUK.piUS)
## [1] 295
div.hifst.AUUK.hiAUpi <- div.hifst.AUUK[which(div.hifst.AUUK$piUK.piAU < 0),]
div.hifst.UKUS.hiUSpi <- div.hifst.UKUS[which(div.hifst.UKUS$piUK.piUS < 0),]
lab.AU <- rep("AU",length(div$piUK.piAU))
lab.US <- rep("US",length(div$piUK.piUS))
group <- c(lab.AU,lab.US)
pi.hist.data <- c(div$piUK.piUS,div$piUK.piAU)
pi.hist.lab <- data.frame(pi = pi.hist.data, population = group)
str(pi.hist.lab)
## 'data.frame': 40142 obs. of 2 variables:
## $ pi : num -1.04e-05 -1.83e-04 2.09e-04 8.63e-05 1.79e-04 ...
## $ population: Factor w/ 2 levels "AU","US": 1 1 1 1 1 1 1 1 1 1 ...
ggplot(pi.hist.lab, aes(x=pi, y=..density.., fill=population)) +
geom_vline(xintercept=0,colour="black",size=0.5) +
geom_vline(xintercept=0.0005,colour="black",size=0.5) +
geom_vline(xintercept=-0.0005,colour="black",size=0.5) +
geom_density(alpha=0.8,lwd=0.5) + theme_classic() +
scale_fill_manual(values=c("#2c81a8","#F2C14E")) +
xlab("Difference in pi") + labs(fill="Population") +
geom_vline(xintercept=mean(div$piUK.piAU),colour=alpha("#F2C14E"), size=1) +
geom_vline(xintercept=mean(div$piUK.piUS),colour=alpha("#2c81a8"), size=1) +
theme(legend.position="none")
pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/HistDensity_PiDifference.pdf",width=4,height=3)
ggplot(pi.hist.lab, aes(x=pi, y=..density.., fill=population)) +
geom_vline(xintercept=0,colour="black",size=0.5) +
geom_vline(xintercept=0.0005,colour="black",size=0.5) +
geom_vline(xintercept=-0.0005,colour="black",size=0.5) +
geom_density(alpha=0.8,lwd=0.5) + theme_classic() +
scale_fill_manual(values=c("#2c81a8","#F2C14E")) +
xlab("Difference in pi") + labs(fill="Population") +
geom_vline(xintercept=mean(div$piUK.piAU),colour=alpha("#F2C14E"), size=1) +
geom_vline(xintercept=mean(div$piUK.piUS),colour=alpha("#2c81a8"), size=1) +
theme(legend.position="none")
dev.off()
## quartz_off_screen
## 2
lab.AU <- rep("AU",length(div.hifst.AUUK$piUK.piAU))
lab.US <- rep("US",length(div.hifst.UKUS$piUK.piUS))
group <- c(lab.AU,lab.US)
pi.hist.data <- c(div.hifst.UKUS$piUK.piUS,div.hifst.AUUK$piUK.piAU)
pi.hist.lab <- data.frame(pi = pi.hist.data, population = group)
str(pi.hist.lab)
## 'data.frame': 967 obs. of 2 variables:
## $ pi : num 1.88e-04 8.97e-05 1.13e-04 4.36e-04 2.19e-04 ...
## $ population: Factor w/ 2 levels "AU","US": 1 1 1 1 1 1 1 1 1 1 ...
ggplot(pi.hist.lab, aes(x=pi, y=..density.., fill=population)) +
geom_vline(xintercept=0,colour="black",size=0.5) +
geom_vline(xintercept=0.0005,colour="black",size=0.5) +
geom_vline(xintercept=-0.0005,colour="black",size=0.5) +
geom_density(alpha=0.8,lwd=0.5) + theme_classic() +
scale_fill_manual(values=c("#2c81a8","#F2C14E")) +
xlab("Difference in pi") + labs(fill="Population") +
geom_vline(xintercept=mean(div.hifst.AUUK$piUK.piAU),colour=alpha("#F2C14E"), size=1) +
geom_vline(xintercept=mean(div.hifst.UKUS$piUK.piUS),colour=alpha("#2c81a8"), size=1) +
theme(legend.position="none")
pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/HistDensity_PiDifference_HiFst.pdf",width=4,height=3)
ggplot(pi.hist.lab, aes(x=pi, y=..density.., fill=population)) +
geom_vline(xintercept=0,colour="black",size=0.5) +
geom_vline(xintercept=0.0005,colour="black",size=0.5) +
geom_vline(xintercept=-0.0005,colour="black",size=0.5) +
geom_density(alpha=0.8,lwd=0.5) + theme_classic() +
scale_fill_manual(values=c("#2c81a8","#F2C14E")) +
xlab("Difference in pi") + labs(fill="Population") +
geom_vline(xintercept=mean(div.hifst.AUUK$piUK.piAU),colour=alpha("#F2C14E"), size=1) +
geom_vline(xintercept=mean(div.hifst.UKUS$piUK.piUS),colour=alpha("#2c81a8"), size=1) +
theme(legend.position="none")
dev.off()
## quartz_off_screen
## 2
Are regions with novel pi also highly differentiated? Expect this scatterplot to look bimodal, where shifts in diversity in either direction led to differentiation between populations.
ggplot(data=div) +
geom_point(aes(x=WEIGHTED_FST_UKUS, y=piUK.piUS),col="#2c81a8",cex=0.7) +
#xlab("Fst (Native vs. Invasive)") + ylab("Pi Invasive") +
xlab("") + ylab("") +
stat_smooth(aes(x=WEIGHTED_FST_UKUS, y=piUK.piUS),method="loess",col="black",lwd=0.5) +
xlim(0,0.31) + ylim(-0.002,0.002) + theme_classic()
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 3 rows containing non-finite values (stat_smooth).
## Warning: Removed 3 rows containing missing values (geom_point).
ggplot(data=div) +
geom_point(aes(x=WEIGHTED_FST_AUUK, y=piUK.piAU),col="#F2C14E",cex=0.7) +
#xlab("Fst (Native vs. Invasive)") + ylab("Pi Invasive") +
xlab("") + ylab("") +
stat_smooth(aes(x=WEIGHTED_FST_AUUK, y=piUK.piAU),method="loess",col="black",lwd=0.5) +
xlim(0,0.31) + ylim(-0.002,0.002) + theme_classic()
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 18 rows containing non-finite values (stat_smooth).
## Warning: Removed 18 rows containing missing values (geom_point).
ggplot(data=div) +
geom_point(aes(x=WEIGHTED_FST_UKUS, y=piUK.piUS),col="#2c81a8",cex=0.7) +
geom_point(aes(x=WEIGHTED_FST_AUUK, y=piUK.piAU),col="#F2C14E",cex=0.7) +
xlab("Fst (Native vs. Invasive)") + ylab("Pi Native - Pi Invasive") +
xlim(-0.01,0.41) + ylim(-0.003,0.003) + theme_bw() +
geom_density_2d(aes(x=WEIGHTED_FST_AUUK, y=piUK.piAU), colour="#ffffff") +
geom_density_2d(aes(x=WEIGHTED_FST_UKUS, y=piUK.piUS), colour="#2c81a8") +
guides(col = guide_legend(label = TRUE, label.position = "bottom",
direction = "horizontal"))
pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/FstPi.density.pdf",height=5,width=5)
ggplot(data=div) +
geom_point(aes(x=WEIGHTED_FST_UKUS, y=piUK.piUS),col="#2c81a8",cex=0.7) +
geom_point(aes(x=WEIGHTED_FST_AUUK, y=piUK.piAU),col="#F2C14E",cex=0.7) +
xlab("Fst (Native vs. Invasive)") + ylab("Pi Native - Pi Invasive") +
xlim(-0.01,0.41) + ylim(-0.003,0.003) + theme_bw() +
geom_density_2d(aes(x=WEIGHTED_FST_AUUK, y=piUK.piAU), colour="#ffffff") +
geom_density_2d(aes(x=WEIGHTED_FST_UKUS, y=piUK.piUS), colour="#2c81a8") +
guides(col = guide_legend(label = TRUE, label.position = "bottom",
direction = "horizontal"))
dev.off()
## quartz_off_screen
## 2
#pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/FstPi.density.USUK.pdf",height=2,width=2)
#dev.off()
In a given window, if diversity in the invasive range is higher than that of the native range, it is possible that those variants are novel mutations. This filtering will tell us whether we should look at particular genotypes in these regions.
div.novelpi <- div %>%
filter((div$piUK.piAU < 0) & (div$piUK.piUS < 0))
# % of low native diversity SNPs are higher in diversity in both invasions
# sanity check based on calc above
length(div.novelpi$SNP)/length(div$SNP)
## [1] 0.1386578
div.novelUSpi <- div %>%
filter(div$piUK.piUS < 0)
length(div.novelUSpi$SNP)/length(div$SNP)
## [1] 0.2566888
div.novelAUpi <- div %>%
filter(div$piUK.piAU < 0)
length(div.novelAUpi$SNP)/length(div$SNP)
## [1] 0.2948533
How to test whether novel pi is evenly distributed across the genome? If even, then we expect to see a random sampling of chromosomes represented in this smaller dataset.
unique(div.novelpi$CHROM) # which chromosomes have higher invasive pi in both invasions
## [1] 10.00 11.00 12.00 13.00 14.00 15.00 17.00 18.00 19.00 1.25 1.75 1.00
## [13] 20.00 21.00 22.00 23.00 24.00 25.00 26.00 27.00 28.00 2.00 3.00 4.50
## [25] 4.00 5.00 6.00 7.00 8.00 9.00 29.00 0.00
unique(div.novelUSpi$CHROM) # just in US
## [1] 10.00 11.00 12.00 13.00 14.00 15.00 17.00 18.00 19.00 1.25 1.75 1.00
## [13] 20.00 21.00 22.00 23.00 24.00 25.00 26.00 27.00 28.00 2.00 3.00 4.50
## [25] 4.00 5.00 6.00 7.00 8.00 9.00 29.00 0.00
unique(div.novelAUpi$CHROM) # in AU
## [1] 10.00 11.00 12.00 13.00 14.00 15.00 16.00 17.00 18.00 19.00 1.25 1.75
## [13] 1.00 20.00 21.00 22.00 23.00 24.00 25.00 26.00 27.00 28.00 2.00 3.00
## [25] 4.50 4.00 5.00 6.00 7.00 8.00 9.00 29.00 0.00
length(unique(div.novelpi$CHROM))/length(unique(div$CHROM))
## [1] 0.969697
length(unique(div.novelUSpi$SNP)) # are most of these higher invasive pi regions also the moderately differentiated windows
## [1] 5152
length(unique(div.hifst.UKUS$SNP))
## [1] 289
length(unique(div.hifst.UKUS$SNP))/length(unique(div.novelUSpi$SNP))
## [1] 0.05609472
length(unique(div.novelAUpi$SNP))
## [1] 5918
length(unique(div.hifst.AUUK$SNP))
## [1] 678
length(unique(div.hifst.AUUK$SNP))/length(unique(div.novelAUpi$SNP))
## [1] 0.1145657
This is a different calculation than asking whether differentiated windows are also high in invasive diversity, since we’re drawing from different sets.
Can we color points in the manhattan plot by diff in diversity
SNP.novelUSpi <- div.novelUSpi$SNP
SNP.novelAUpi <- div.novelAUpi$SNP
SNP.novelAUpi.hifstonly <- div.hifst.AUUK.hiAUpi$SNP
SNP.novelUSpi.hifstonly <- div.hifst.UKUS.hiUSpi$SNP
length(div.hifst.UKUS.hiUSpi$SNP)
## [1] 87
length(div.hifst.AUUK.hiAUpi$SNP)
## [1] 256
manhattan(div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_UKUS",
ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
highlight=SNP.novelUSpi.hifstonly,
cex=1,cex.axis=1, chrlabs=c("Z",1,"1A","1B",2,3,4,"4A",5:29))
#pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/Fst.UKUS.Manhattan.pdf",w=12,h=3)
#dev.off()
manhattan(div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_AUUK",
ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
highlight=SNP.novelAUpi.hifstonly,
cex=1,cex.axis=1, chrlabs=c("Z",1,"1A","1B",2,3,4,"4A",5:29))
#pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/Fst.AUUK.Manhattan.pdf",w=12,h=3)
#dev.off()
div.outliers.hiAUpi <- div.outliers.AUUK[which(div.outliers.AUUK$piUK.piAU < 0),]
length(div.outliers.hiAUpi$SNP)
## [1] 91
length(div.outliers.hiAUpi$SNP)/length(div.outliers.AUUK$SNP)
## [1] 0.4527363
% of FST outlier windows have higher diversity in AU
div.outliers.hiUSpi <- div.outliers.USUK[which(div.outliers.USUK$piUK.piUS < 0),]
length(div.outliers.hiUSpi$SNP)
## [1] 61
length(div.outliers.hiUSpi$SNP)/length(div.outliers.USUK$SNP)
## [1] 0.3034826
% of FST outlier windows have higher diversity in US
intersect(div.outliers.hiUSpi$CHROM,div.outliers.hiAUpi$CHROM)
## [1] 12.00 1.25 1.00 2.00 3.00 6.00 0.00
unique(div.outliers.hiAUpi$CHROM)
## [1] 12.00 13.00 1.25 1.00 23.00 27.00 2.00 3.00 4.50 5.00 6.00 0.00
unique(div.outliers.hiUSpi$CHROM)
## [1] 12.00 17.00 1.25 1.00 2.00 3.00 4.00 6.00 0.00
The plots below are based on code from Gemma Clucas.
chrom2.div <- div[which(div$CHROM==2),]
manhattan(chrom2.div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_AUUK",
ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
highlight=SNP.novelAUpi.hifstonly,
cex=1,cex.axis=1)
## Warning in manhattan(chrom2.div, chr = "CHROM", bp = "BIN_START", snp = "SNP", :
## You're trying to highlight SNPs that don't exist in your results.
manhattan(chrom2.div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_UKUS",
ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
highlight=SNP.novelUSpi.hifstonly,
cex=1,cex.axis=1)
## Warning in manhattan(chrom2.div, chr = "CHROM", bp = "BIN_START", snp = "SNP", :
## You're trying to highlight SNPs that don't exist in your results.
chrom2.div.small <- chrom2.div[which(chrom2.div$SNP < 8980),]
chrom2.div.small <- chrom2.div.small[which(chrom2.div.small$SNP > 8600),]
# 39200001 to 51650000
quartz(height=5,width=7)
options(scipen=999)
par(mfrow=c(2,1)) # set rows
par(mar=c(0,2,0.5,2)) # set margins for each plot
plot((chrom2.div.small$BIN_START), chrom2.div.small$WEIGHTED_FST_AUUK, type="n", bty="n", axes=FALSE, ylim=c(-0.2,0.5))
lines((chrom2.div.small$BIN_START), chrom2.div.small$WEIGHTED_FST_AUUK, col="#F2C14E",lwd=2)
par(new=T)
plot((chrom2.div.small$BIN_START), chrom2.div.small$WEIGHTED_FST_UKUS, type="n", bty="n", axes=FALSE, ylim=c(-0.2,0.5))
lines((chrom2.div.small$BIN_START), chrom2.div.small$WEIGHTED_FST_UKUS, col="#2c81a8", lwd=2)
par(new=T)
plot((chrom2.div.small$BIN_START), chrom2.div.small$WEIGHTED_FST_USAU, type="n", bty="n", axes=FALSE, ylim=c(-0.2,0.5))
lines((chrom2.div.small$BIN_START), chrom2.div.small$WEIGHTED_FST_USAU, col="grey50", lwd=1)
axis(side=2,ylim=c(-0.2,0.5))
abline(h=quantile(div$WEIGHTED_FST_UKUS,.99), col="#2c81a8", lwd=0.5)
abline(h=quantile(div$WEIGHTED_FST_AUUK,.99), col="#F2C14E", lwd=0.5)
par(new=T)
plot((chrom2.div.small$BIN_START), chrom2.div.small$PI_AU, type="n", axes=FALSE, bty="n", ylim=c(0,0.04))
lines((chrom2.div.small$BIN_START), chrom2.div.small$PI_AU, col="#F2C14E", lwd=2)
par(new=T)
plot((chrom2.div.small$BIN_START), chrom2.div.small$PI_US, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(0,0.04))
lines((chrom2.div.small$BIN_START), chrom2.div.small$PI_US, col="#2c81a8", lwd=2)
axis(side=4, ylim=c(0,0.04))
par(new=T)
plot((chrom2.div.small$BIN_START), chrom2.div.small$PI_UK, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(0,0.04))
lines((chrom2.div.small$BIN_START), chrom2.div.small$PI_UK, col="#39C855", lwd=1)
par(mar=c(1,2,1,2))
plot((chrom2.div.small$BIN_START), chrom2.div.small$TajimaD_AU, type="n",axes=FALSE, bty="n", xlab=NA, ylim=c(-2.4,2.6))
lines((chrom2.div.small$BIN_START), chrom2.div.small$TajimaD_AU, col="#F2C14E", lwd=2)
par(new=T)
plot((chrom2.div.small$BIN_START), chrom2.div.small$TajimaD_US, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(-2.4,2.6))
lines((chrom2.div.small$BIN_START), chrom2.div.small$TajimaD_US, col="#2c81a8",lwd=2)
par(new=T)
plot((chrom2.div.small$BIN_START), chrom2.div.small$TajimaD_UK, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(-2.4,2.6))
lines((chrom2.div.small$BIN_START), chrom2.div.small$TajimaD_UK, col="#39C855", lwd=1)
axis(side=2, ylim=c(-2.4,2.4)) # tajima's D axis
axis(side=1)
quartz.save("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/Chromosome2.ManhattanZoom.pdf", type="pdf")
## quartz_off_screen
## 2
chrom6.div <- div[which(div$CHROM==6),]
# runs from 16155 to 16871
# chrom 6 = 716 50kb windows ("SNPs" here)
manhattan(chrom6.div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_AUUK",
ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
highlight=SNP.novelAUpi.hifstonly,
cex=1,cex.axis=1)
## Warning in manhattan(chrom6.div, chr = "CHROM", bp = "BIN_START", snp = "SNP", :
## You're trying to highlight SNPs that don't exist in your results.
manhattan(chrom6.div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_UKUS",
ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
highlight=SNP.novelUSpi.hifstonly,
cex=1,cex.axis=1)
## Warning in manhattan(chrom6.div, chr = "CHROM", bp = "BIN_START", snp = "SNP", :
## You're trying to highlight SNPs that don't exist in your results.
chrom6.div.small <- chrom6.div[which(chrom6.div$SNP < 16325),]
chrom6.div.small <- chrom6.div.small[which(chrom6.div.small$SNP > 16225),]
head(chrom6.div.small)
## CHROM BIN_START BIN_END N_VARIANTS WEIGHTED_FST_UKUS MEAN_FST_UKUS
## 16226 6 3550001 3600000 570 0.01206770 0.008669440
## 16227 6 3600001 3650000 528 0.03079550 0.023766500
## 16228 6 3650001 3700000 701 0.02597620 0.020965100
## 16229 6 3700001 3750000 805 0.00338468 0.000400509
## 16230 6 3750001 3800000 830 0.02082790 0.017915200
## 16231 6 3800001 3850000 800 0.00589529 0.001372410
## WEIGHTED_FST_AUUK MEAN_FST_AUUK WEIGHTED_FST_USAU MEAN_FST_USAU
## 16226 0.0367649 0.0305788 0.0392762 0.0368076
## 16227 0.0501026 0.0406660 0.0739191 0.0613789
## 16228 0.0711590 0.0593933 0.0454643 0.0368937
## 16229 0.0754966 0.0604820 0.0708642 0.0562380
## 16230 0.0375712 0.0322166 0.0438655 0.0358050
## 16231 0.1001270 0.0672706 0.0743417 0.0579428
## PI_UK PI_US PI_AU TajimaD_UK TajimaD_US TajimaD_AU SNP
## 16226 0.00366513 0.00384209 0.00372947 0.592114 0.972481 0.785959 16226
## 16227 0.00349155 0.00329177 0.00364804 0.682877 0.782069 0.892366 16227
## 16228 0.00468438 0.00458936 0.00478475 0.859767 0.784092 0.972961 16228
## 16229 0.00535166 0.00537373 0.00535428 0.706416 0.819633 0.873451 16229
## 16230 0.00555363 0.00537467 0.00539254 0.840503 0.753410 0.770190 16230
## 16231 0.00531258 0.00519576 0.00516348 0.695610 0.790285 0.812897 16231
## piUK.piAU piUK.piUS
## 16226 -0.00006434 -0.00017696
## 16227 -0.00015649 0.00019978
## 16228 -0.00010037 0.00009502
## 16229 -0.00000262 -0.00002207
## 16230 0.00016109 0.00017896
## 16231 0.00014910 0.00011682
chrom6.div.hifst <- chrom6.div[which(chrom6.div$SNP < 16281),]
chrom6.div.hifst <- chrom6.div.small[which(chrom6.div.small$SNP > 16263),]
# AUUK high fst: window 5350001 to 6300001 on Chrom 6
# "SNP" 16263 to 16281
mean(chrom6.div.hifst$PI_UK)
## [1] 0.002873224
mean(chrom6.div.hifst$PI_US)
## [1] 0.002896161
mean(chrom6.div.hifst$PI_AU)
## [1] 0.00293987
chrom6.div.med <- chrom6.div[which(chrom6.div$SNP < 16450),]
chrom6.div.med <- chrom6.div.med[which(chrom6.div.med$SNP > 16155),]
quartz(height=5,width=7)
options(scipen=999)
par(mfrow=c(2,1))
par(mar=c(0,2,0.5,2))
plot((chrom6.div.small$BIN_START), chrom6.div.small$WEIGHTED_FST_AUUK, type="n", bty="n", axes=FALSE, ylim=c(-0.2,0.4))
lines((chrom6.div.small$BIN_START), chrom6.div.small$WEIGHTED_FST_AUUK, col="#F2C14E",lwd=2)
par(new=T)
plot((chrom6.div.small$BIN_START), chrom6.div.small$WEIGHTED_FST_UKUS, type="n", bty="n", axes=FALSE, ylim=c(-0.2,0.4))
lines((chrom6.div.small$BIN_START), chrom6.div.small$WEIGHTED_FST_UKUS, col="#2c81a8", lwd=2)
par(new=T)
plot((chrom6.div.small$BIN_START), chrom6.div.small$WEIGHTED_FST_USAU, type="n", bty="n", axes=FALSE, ylim=c(-0.2,0.4))
lines((chrom6.div.small$BIN_START), chrom6.div.small$WEIGHTED_FST_USAU, col="grey50", lwd=1)
axis(side=2,ylim=c(-0.2,0.4))
abline(h=quantile(div$WEIGHTED_FST_UKUS,.99), col="#2c81a8", lwd=0.5)
abline(h=quantile(div$WEIGHTED_FST_AUUK,.99), col="#F2C14E", lwd=0.5)
par(new=T)
plot((chrom6.div.small$BIN_START), chrom6.div.small$PI_AU, type="n", axes=FALSE, bty="n", ylim=c(0,0.04))
lines((chrom6.div.small$BIN_START), chrom6.div.small$PI_AU, col="#F2C14E", lwd=2)
par(new=T)
plot((chrom6.div.small$BIN_START), chrom6.div.small$PI_US, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(0,0.04))
lines((chrom6.div.small$BIN_START), chrom6.div.small$PI_US, col="#2c81a8", lwd=2)
axis(side=4, ylim=c(0,0.03))
par(new=T)
plot((chrom6.div.small$BIN_START), chrom6.div.small$PI_UK, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(0,0.04))
lines((chrom6.div.small$BIN_START), chrom6.div.small$PI_UK, col="#39C855", lwd=1)
par(mar=c(1,2,1,2))
plot((chrom6.div.small$BIN_START), chrom6.div.small$TajimaD_AU, type="n",axes=FALSE, bty="n", xlab=NA, ylim=c(-2.4,2.4))
lines((chrom6.div.small$BIN_START), chrom6.div.small$TajimaD_AU, col="#F2C14E", lwd=2)
par(new=T)
plot((chrom6.div.small$BIN_START), chrom6.div.small$TajimaD_US, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(-2.4,2.4))
lines((chrom6.div.small$BIN_START), chrom6.div.small$TajimaD_US, col="#2c81a8",lwd=2)
par(new=T)
plot((chrom6.div.small$BIN_START), chrom6.div.small$TajimaD_UK, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(-2.4,2.4))
lines((chrom6.div.small$BIN_START), chrom6.div.small$TajimaD_UK, col="#39C855", lwd=1)
axis(side=2, ylim=c(-2.4,2.4))
axis(side=1)
quartz.save("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/manhattans_for_pub/Chromosome6.ManhattanZoom.pdf", type="pdf")
## quartz_off_screen
## 2
quartz(height=3,width=7)
options(scipen=999)
par(new=T)
## Warning in par(new = T): calling par(new=TRUE) with no plot
plot((chrom6.div.med$BIN_START), chrom6.div.med$WEIGHTED_FST_AUUK, type="n", bty="n", axes=FALSE, ylim=c(-0.01,0.4))
lines((chrom6.div.med$BIN_START), chrom6.div.med$WEIGHTED_FST_AUUK, col="#F2C14E",lwd=2)
par(new=T)
plot((chrom6.div.med$BIN_START), chrom6.div.med$WEIGHTED_FST_UKUS, type="n", bty="n", axes=FALSE, ylim=c(-0.01,0.4))
lines((chrom6.div.med$BIN_START), chrom6.div.med$WEIGHTED_FST_UKUS, col="#2c81a8", lwd=2)
par(new=T)
plot((chrom6.div.med$BIN_START), chrom6.div.med$WEIGHTED_FST_USAU, type="n", bty="n", axes=FALSE, ylim=c(-0.01,0.4))
lines((chrom6.div.med$BIN_START), chrom6.div.med$WEIGHTED_FST_USAU, col="grey50", lwd=1)
axis(side=2,ylim=c(-0.01,0.4))
abline(h=quantile(div$WEIGHTED_FST_UKUS,.99), col="#2c81a8", lwd=0.5)
abline(h=quantile(div$WEIGHTED_FST_AUUK,.99), col="#F2C14E", lwd=0.5)
quartz.save("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/manhattans_for_pub/Chromosome6.BroaderFSTaroundPeak.pdf", type="pdf")
## quartz_off_screen
## 2
chrom1.div <- div[which(div$CHROM==1),]
manhattan(chrom1.div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_AUUK",
ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
highlight=SNP.novelAUpi.hifstonly,
cex=1,cex.axis=1)
## Warning in manhattan(chrom1.div, chr = "CHROM", bp = "BIN_START", snp = "SNP", :
## You're trying to highlight SNPs that don't exist in your results.
manhattan(chrom1.div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_UKUS",
ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
highlight=SNP.novelUSpi.hifstonly,
cex=1,cex.axis=1)
## Warning in manhattan(chrom1.div, chr = "CHROM", bp = "BIN_START", snp = "SNP", :
## You're trying to highlight SNPs that don't exist in your results.
chrom1.div.small <- chrom1.div[which(chrom1.div$SNP < 6700),]
chrom1.div.small <- chrom1.div.small[which(chrom1.div.small$SNP > 6400),]
quartz(height=5,width=7)
options(scipen=999)
par(mfrow=c(2,1))
par(mar=c(0,2,0.5,2))
plot((chrom1.div.small$BIN_START), chrom1.div.small$WEIGHTED_FST_AUUK, type="n", bty="n", axes=FALSE, ylim=c(-0.2,0.4))
lines((chrom1.div.small$BIN_START), chrom1.div.small$WEIGHTED_FST_AUUK, col="#F2C14E",lwd=2)
par(new=T)
plot((chrom1.div.small$BIN_START), chrom1.div.small$WEIGHTED_FST_UKUS, type="n", bty="n", axes=FALSE, ylim=c(-0.2,0.4))
lines((chrom1.div.small$BIN_START), chrom1.div.small$WEIGHTED_FST_UKUS, col="#2c81a8", lwd=2)
par(new=T)
plot((chrom1.div.small$BIN_START), chrom1.div.small$WEIGHTED_FST_USAU, type="n", bty="n", axes=FALSE, ylim=c(-0.2,0.4))
lines((chrom1.div.small$BIN_START), chrom1.div.small$WEIGHTED_FST_USAU, col="grey50", lwd=1)
axis(side=2,ylim=c(-0.2,0.4))
abline(h=quantile(div$WEIGHTED_FST_UKUS,.99), col="#2c81a8", lwd=0.5)
abline(h=quantile(div$WEIGHTED_FST_AUUK,.99), col="#F2C14E", lwd=0.5)
par(new=T)
plot((chrom1.div.small$BIN_START), chrom1.div.small$PI_AU, type="n", axes=FALSE, bty="n", ylim=c(0,0.03))
lines((chrom1.div.small$BIN_START), chrom1.div.small$PI_AU, col="#F8DD9E", lwd=2)
par(new=T)
plot((chrom1.div.small$BIN_START), chrom1.div.small$PI_US, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(0,0.03))
lines((chrom1.div.small$BIN_START), chrom1.div.small$PI_US, col="#66A3C0", lwd=2)
axis(side=4, ylim=c(0,0.03))
par(new=T)
plot((chrom1.div.small$BIN_START), chrom1.div.small$PI_UK, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(0,0.03))
lines((chrom1.div.small$BIN_START), chrom1.div.small$PI_UK, col="#39C855", lwd=1)
par(mar=c(1,2,1,2))
plot((chrom1.div.small$BIN_START), chrom1.div.small$TajimaD_AU, type="n",axes=FALSE, bty="n", xlab=NA, ylim=c(-2.4,3.4))
lines((chrom1.div.small$BIN_START), chrom1.div.small$TajimaD_AU, col="#F2C14E", lwd=2)
par(new=T)
plot((chrom1.div.small$BIN_START), chrom1.div.small$TajimaD_US, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(-2.4,3.4))
lines((chrom1.div.small$BIN_START), chrom1.div.small$TajimaD_US, col="#2c81a8",lwd=2)
par(new=T)
plot((chrom1.div.small$BIN_START), chrom1.div.small$TajimaD_UK, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(-2.4,3.4))
lines((chrom1.div.small$BIN_START), chrom1.div.small$TajimaD_UK, col="#39C855", lwd=1)
axis(side=2, ylim=c(-2.4,3.4))
axis(side=1)
quartz.save("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/manhattans_for_pub/Chromosome1.ManhattanZoom.pdf", type="pdf")
## quartz_off_screen
## 2
chrom1A.div <- div[which(div$CHROM==1.25),]
# 2896 to 4342
# 1446 windows, 3 ticks
manhattan(chrom1A.div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_AUUK",
ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
highlight=SNP.novelAUpi.hifstonly,
cex=1,cex.axis=1)
## Warning in manhattan(chrom1A.div, chr = "CHROM", bp = "BIN_START", snp =
## "SNP", : You're trying to highlight SNPs that don't exist in your results.
manhattan(chrom1A.div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_UKUS",
ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
highlight=SNP.novelUSpi.hifstonly,
cex=1,cex.axis=1)
## Warning in manhattan(chrom1A.div, chr = "CHROM", bp = "BIN_START", snp =
## "SNP", : You're trying to highlight SNPs that don't exist in your results.
chrom1A.div.small <- chrom1A.div[which(chrom1A.div$SNP < 4000),]
chrom1A.div.small <- chrom1A.div.small[which(chrom1A.div.small$SNP > 3700),]
quartz(height=5,width=7)
options(scipen=999)
par(mfrow=c(2,1))
par(mar=c(0,2,0.5,2))
plot((chrom1A.div.small$BIN_START), chrom1A.div.small$WEIGHTED_FST_AUUK, type="n", bty="n", axes=FALSE, ylim=c(-0.2,0.4))
lines((chrom1A.div.small$BIN_START), chrom1A.div.small$WEIGHTED_FST_AUUK, col="#F2C14E",lwd=2)
par(new=T)
plot((chrom1A.div.small$BIN_START), chrom1A.div.small$WEIGHTED_FST_UKUS, type="n", bty="n", axes=FALSE, ylim=c(-0.2,0.4))
lines((chrom1A.div.small$BIN_START), chrom1A.div.small$WEIGHTED_FST_UKUS, col="#2c81a8", lwd=2)
par(new=T)
plot((chrom1A.div.small$BIN_START), chrom1A.div.small$WEIGHTED_FST_USAU, type="n", bty="n", axes=FALSE, ylim=c(-0.2,0.4))
lines((chrom1A.div.small$BIN_START), chrom1A.div.small$WEIGHTED_FST_USAU, col="grey50", lwd=1)
axis(side=2,ylim=c(-0.2,0.4))
abline(h=quantile(div$WEIGHTED_FST_UKUS,.99), col="#2c81a8", lwd=0.5)
abline(h=quantile(div$WEIGHTED_FST_AUUK,.99), col="#F2C14E", lwd=0.5)
par(new=T)
plot((chrom1A.div.small$BIN_START), chrom1A.div.small$PI_AU, type="n", axes=FALSE, bty="n", ylim=c(0,0.03))
lines((chrom1A.div.small$BIN_START), chrom1A.div.small$PI_AU, col="#F8DD9E", lwd=2)
par(new=T)
plot((chrom1A.div.small$BIN_START), chrom1A.div.small$PI_US, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(0,0.03))
lines((chrom1A.div.small$BIN_START), chrom1A.div.small$PI_US, col="#66A3C0", lwd=2)
axis(side=4, ylim=c(0,0.03))
par(new=T)
plot((chrom1A.div.small$BIN_START), chrom1A.div.small$PI_UK, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(0,0.03))
lines((chrom1A.div.small$BIN_START), chrom1A.div.small$PI_UK, col="#39C855", lwd=1)
par(mar=c(1,2,1,2))
plot((chrom1A.div.small$BIN_START), chrom1A.div.small$TajimaD_AU, type="n",axes=FALSE, bty="n", xlab=NA, ylim=c(-2.4,3.4))
lines((chrom1A.div.small$BIN_START), chrom1A.div.small$TajimaD_AU, col="#F2C14E", lwd=2)
par(new=T)
plot((chrom1A.div.small$BIN_START), chrom1A.div.small$TajimaD_US, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(-2.4,3.4))
lines((chrom1A.div.small$BIN_START), chrom1A.div.small$TajimaD_US, col="#2c81a8",lwd=2)
par(new=T)
plot((chrom1A.div.small$BIN_START), chrom1A.div.small$TajimaD_UK, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(-2.4,3.4))
lines((chrom1A.div.small$BIN_START), chrom1A.div.small$TajimaD_UK, col="#39C855", lwd=1)
axis(side=2, ylim=c(-2.4,3.4))
axis(side=1)
quartz.save("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/manhattans_for_pub/Chromosome1A.ManhattanZoom.pdf", type="pdf")
## quartz_off_screen
## 2
chrom4.div <- div[which(div$CHROM==4),]
# 13506 to 14923,
# 1417 windows, 7 ticks - peak ~500 windows from start
manhattan(chrom4.div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_AUUK",
ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
highlight=SNP.novelAUpi.hifstonly,
cex=1,cex.axis=1)
## Warning in manhattan(chrom4.div, chr = "CHROM", bp = "BIN_START", snp = "SNP", :
## You're trying to highlight SNPs that don't exist in your results.
manhattan(chrom4.div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_UKUS",
ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
highlight=SNP.novelUSpi.hifstonly,
cex=1,cex.axis=1)
## Warning in manhattan(chrom4.div, chr = "CHROM", bp = "BIN_START", snp = "SNP", :
## You're trying to highlight SNPs that don't exist in your results.
chrom4.div.small <- chrom4.div[which(chrom4.div$SNP < 14150),]
chrom4.div.small <- chrom4.div.small[which(chrom4.div.small$SNP > 13850),]
quartz(height=5,width=7)
options(scipen=999)
par(mfrow=c(2,1))
par(mar=c(0,2,0.5,2))
plot((chrom4.div.small$BIN_START), chrom4.div.small$WEIGHTED_FST_AUUK, type="n", bty="n", axes=FALSE, ylim=c(-0.2,0.4))
lines((chrom4.div.small$BIN_START), chrom4.div.small$WEIGHTED_FST_AUUK, col="#F2C14E",lwd=2)
par(new=T)
plot((chrom4.div.small$BIN_START), chrom4.div.small$WEIGHTED_FST_UKUS, type="n", bty="n", axes=FALSE, ylim=c(-0.2,0.4))
lines((chrom4.div.small$BIN_START), chrom4.div.small$WEIGHTED_FST_UKUS, col="#2c81a8", lwd=2)
par(new=T)
plot((chrom4.div.small$BIN_START), chrom4.div.small$WEIGHTED_FST_USAU, type="n", bty="n", axes=FALSE, ylim=c(-0.2,0.4))
lines((chrom4.div.small$BIN_START), chrom4.div.small$WEIGHTED_FST_USAU, col="grey50", lwd=1)
axis(side=2,ylim=c(-0.2,0.4))
abline(h=quantile(div$WEIGHTED_FST_UKUS,.99), col="#2c81a8", lwd=0.5)
abline(h=quantile(div$WEIGHTED_FST_AUUK,.99), col="#F2C14E", lwd=0.5)
par(new=T)
plot((chrom4.div.small$BIN_START), chrom4.div.small$PI_AU, type="n", axes=FALSE, bty="n", ylim=c(0,0.03))
lines((chrom4.div.small$BIN_START), chrom4.div.small$PI_AU, col="#F8DD9E", lwd=2)
par(new=T)
plot((chrom4.div.small$BIN_START), chrom4.div.small$PI_US, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(0,0.03))
lines((chrom4.div.small$BIN_START), chrom4.div.small$PI_US, col="#66A3C0", lwd=2)
axis(side=4, ylim=c(0,0.03))
par(new=T)
plot((chrom4.div.small$BIN_START), chrom4.div.small$PI_UK, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(0,0.03))
lines((chrom4.div.small$BIN_START), chrom4.div.small$PI_UK, col="#39C855", lwd=1)
par(mar=c(1,2,1,2))
plot((chrom4.div.small$BIN_START), chrom4.div.small$TajimaD_AU, type="n",axes=FALSE, bty="n", xlab=NA, ylim=c(-2.4,3.4))
lines((chrom4.div.small$BIN_START), chrom4.div.small$TajimaD_AU, col="#F2C14E", lwd=2)
par(new=T)
plot((chrom4.div.small$BIN_START), chrom4.div.small$TajimaD_US, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(-2.4,3.4))
lines((chrom4.div.small$BIN_START), chrom4.div.small$TajimaD_US, col="#2c81a8",lwd=2)
par(new=T)
plot((chrom4.div.small$BIN_START), chrom4.div.small$TajimaD_UK, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(-2.4,3.4))
lines((chrom4.div.small$BIN_START), chrom4.div.small$TajimaD_UK, col="#39C855", lwd=1)
axis(side=2, ylim=c(-2.4,3.4))
axis(side=1)
quartz.save("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/manhattans_for_pub/Chromosome4.ManhattanZoom.pdf", type="pdf")
## quartz_off_screen
## 2
chrom4A.div <- div[which(div$CHROM==4.5),]
# 13097 to 13505
# 408 windows, 4 ticks
manhattan(chrom4A.div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_AUUK",
ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
highlight=SNP.novelAUpi.hifstonly,
cex=1,cex.axis=1)
## Warning in manhattan(chrom4A.div, chr = "CHROM", bp = "BIN_START", snp =
## "SNP", : You're trying to highlight SNPs that don't exist in your results.
manhattan(chrom4A.div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_UKUS",
ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
highlight=SNP.novelUSpi.hifstonly,
cex=1,cex.axis=1)
## Warning in manhattan(chrom4A.div, chr = "CHROM", bp = "BIN_START", snp =
## "SNP", : You're trying to highlight SNPs that don't exist in your results.
chrom4A.div.small <- chrom4A.div[which(chrom4A.div$SNP < 13300),]
chrom4A.div.small <- chrom4A.div.small[which(chrom4A.div.small$SNP > 13150),]
quartz(height=5,width=7)
options(scipen=999)
par(mfrow=c(2,1))
par(mar=c(0,2,0.5,2))
plot((chrom4A.div.small$BIN_START), chrom4A.div.small$WEIGHTED_FST_AUUK, type="n", bty="n", axes=FALSE, ylim=c(-0.2,0.4))
lines((chrom4A.div.small$BIN_START), chrom4A.div.small$WEIGHTED_FST_AUUK, col="#F2C14E",lwd=2)
par(new=T)
plot((chrom4A.div.small$BIN_START), chrom4A.div.small$WEIGHTED_FST_UKUS, type="n", bty="n", axes=FALSE, ylim=c(-0.2,0.4))
lines((chrom4A.div.small$BIN_START), chrom4A.div.small$WEIGHTED_FST_UKUS, col="#2c81a8", lwd=2)
par(new=T)
plot((chrom4A.div.small$BIN_START), chrom4A.div.small$WEIGHTED_FST_USAU, type="n", bty="n", axes=FALSE, ylim=c(-0.2,0.4))
lines((chrom4A.div.small$BIN_START), chrom4A.div.small$WEIGHTED_FST_USAU, col="grey50", lwd=1)
axis(side=2,ylim=c(-0.2,0.4))
abline(h=quantile(div$WEIGHTED_FST_UKUS,.99), col="#2c81a8", lwd=0.5)
abline(h=quantile(div$WEIGHTED_FST_AUUK,.99), col="#F2C14E", lwd=0.5)
par(new=T)
plot((chrom4A.div.small$BIN_START), chrom4A.div.small$PI_AU, type="n", axes=FALSE, bty="n", ylim=c(0,0.03))
lines((chrom4A.div.small$BIN_START), chrom4A.div.small$PI_AU, col="#F8DD9E", lwd=2)
par(new=T)
plot((chrom4A.div.small$BIN_START), chrom4A.div.small$PI_US, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(0,0.03))
lines((chrom4A.div.small$BIN_START), chrom4A.div.small$PI_US, col="#66A3C0", lwd=2)
axis(side=4, ylim=c(0,0.03))
par(new=T)
plot((chrom4A.div.small$BIN_START), chrom4A.div.small$PI_UK, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(0,0.03))
lines((chrom4A.div.small$BIN_START), chrom4A.div.small$PI_UK, col="#39C855", lwd=1)
par(mar=c(1,2,1,2))
plot((chrom4A.div.small$BIN_START), chrom4A.div.small$TajimaD_AU, type="n",axes=FALSE, bty="n", xlab=NA, ylim=c(-2.4,3.4))
lines((chrom4A.div.small$BIN_START), chrom4A.div.small$TajimaD_AU, col="#F2C14E", lwd=2)
par(new=T)
plot((chrom4A.div.small$BIN_START), chrom4A.div.small$TajimaD_US, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(-2.4,3.4))
lines((chrom4A.div.small$BIN_START), chrom4A.div.small$TajimaD_US, col="#2c81a8",lwd=2)
par(new=T)
plot((chrom4A.div.small$BIN_START), chrom4A.div.small$TajimaD_UK, type="n", axes=FALSE, xlab=NA, ylab=NA, bty="n", ylim=c(-2.4,3.4))
lines((chrom4A.div.small$BIN_START), chrom4A.div.small$TajimaD_UK, col="#39C855", lwd=1)
axis(side=2, ylim=c(-2.4,3.4))
axis(side=1)
quartz.save("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/manhattans_for_pub/Chromosome4A.ManhattanZoom.pdf", type="pdf")
## quartz_off_screen
## 2